function [betap_dist,betap_grid,osaka_moments_data] = data_load_osaka(number_grid,number_educ,data_dir, paper_dir)

disp('Running script to load and transform Osaka Preference Parameter Survey data (2010 wave)')

% As discussed in the paper Appendix, we assume the log-utility specification to
% back out annual discount factors (see Stata file master_osaka_betas.do, available upon request). 

data = readtable([data_dir,'osaka_2010_final.txt']);
cd(paper_dir)
data = table2array(data);
%replace missing data with nan
data(data == -9) = NaN;

% This is the order of the relevant variables loaded in from the Stata do-file called "master_osaka_betas.do":
% 1) survey respondent individual identifier
% 2) the respondent's annualized discount rate (r) based on the $100 delayed gratification question, and assuming log-utility
% 3) the respondent's annualized discount factor (beta = 1/(1+r)) based on the $100 delayed gratification question, and assuming log-utility
% 4) the respondent's age in 2010 (ranges from 18 to 99)
% 5) the respondents educational attainment

% Load data indices
col_long.id = 1;            % individual identifier
col_long.disc_rate = 2;     % discount rate r
col_long.disc_factor = 3;   % discount factor beta = 1/(1+r)
col_long.age = 4;           % age of the respondent
col_long.educ_years = 5;    % years of schooling of the respondent

betas = data(:,col_long.disc_factor); % use his vector of elicited annual discount factors for our analysis
age = data(:,col_long.age);
educ_years = data(:,col_long.educ_years);

N = size(data,1);

% education variables
% DROP GRADE SCHOOL = code 10
hs = nan(N,1);
hs(educ_years<=12 & educ_years>=11) = 1;
hs(educ_years>12) = 0;

sc = nan(N,1);
% 13 = some college, no degree
% 14 = associate's degree, 2 year college
sc(educ_years == 13 | educ_years == 14) = 1;
sc(educ_years < 13 | educ_years > 14) = 0;

coll = nan(N,1);
% 16 = graduated bachelor's degree, 4 years
coll(educ_years == 16) = 1;
coll(educ_years < 16 | educ_years > 16) = 0;

grad = nan(N,1);
% 17 = some postgrad, no degree
% 18 = master's
% 19 = doctoral
grad(educ_years >= 17) = 1;
grad(educ_years < 17) = 0;

%% Now define probability distributions by educational type, conditional on number of specified gridpoints

% Clean set of individuals first
% default: use the $100 question and look at discount FACTORs
% default: use entire sample of all individuals (including non-married,
% without kids etc)
% only drop those for whom we don't observe educational attainment levels

dropobs = zeros(size(data,1),1);
dropobs(educ_years==10) = 1; % drop grade school individuals
dropobs(isnan(educ_years)) = 1; % drop if education not observed
dropobs(age<25 | age > 65) = 1; % only keep ages 25-65, also ensure they've finalized education
dropobs(isnan(betas)) = 1; % only keep those who've answered the $100 questions properly (already cleaned in Stata to drop irrational individuals)

% Drop incomplete observations:
betas(dropobs==1) = NaN; 
hs(dropobs==1) = NaN;
sc(dropobs==1) = NaN;
coll(dropobs==1) = NaN;
grad(dropobs==1) = NaN;

% Calculate some numbers for the paper: average discount factor by
% educational attainment level, for 3 groups: 1) high school or les, 2) some
% college or college, 3)graduate
% % (Remove comment signs if needed)

% check = hs+sc+coll+grad;
% disp('Osaka survey - final sample size:')
% sum(check==1)
% 
% disp('1) High school or less: sample size and mean')
% nansum(betas(hs==1)>=0)
% nanmean(betas(hs==1))
% disp('2) Some college or college: sample size and mean')
% nansum(betas(sc==1|coll==1)>=0)
% nanmean(betas(hs==1|coll==1))
% disp('3) Graduates: sample size and mean')
% nansum(betas(grad==1)>=0)
% nanmean(betas(grad==1))

% Note: the rest of the code assumes that the number of education categories for the
% parental discount factor grid (number_educ) is set equal to 3, and the
% number of grid points number_grid) is also equal to 3.

betas_quantiles = [0 0.33 0.67 1];
cutpoints = [0 quantile(betas,betas_quantiles(2:end-1)) 1];

% Give error warning if we have some repeated grid points (due to too much
% mass being concentrated there)
if length(unique(cutpoints)) < length(cutpoints)
    error('Grid points for beta_p are not unique!')
end

% Define discretized grid, using averages within each quantile

betap_grid = nan(number_grid,1);
for bb=2:length(cutpoints)
    if bb == length(cutpoints) % last interval
        betap_grid(bb-1) = mean(betas(betas>=cutpoints(bb-1)& betas<=cutpoints(bb)));
    else
        betap_grid(bb-1) = mean(betas(betas>=cutpoints(bb-1)& betas<cutpoints(bb)));
    end
end
% disp('The grid points for the parental discount factor distribution are:')
% betap_grid

% Define prob distributions based on number of educational types we want
betap_dist = nan(number_grid,number_educ); % beta levels x education levels
tmpp = NaN(size(betas,1),number_educ);
% Note: we group some college and college together, to be consistent with Steinberg data
tmpp(:,1) = hs; % high school or less
tmpp(:,2) = sc+coll; % some college or college
tmpp(:,3) = grad; % graduate degree

% Calculate data moments to put in paper

osaka_moments_data = NaN(number_educ+1, 2); % rows = by SES cat / all hh, columns = mean/stdev
osaka_moments_data(end,:) = [nanmean(betas) nanstd(betas)];

for j = 1:number_educ
    Ntot = sum(~isnan(betas(tmpp(:,j)==1)));
    osaka_moments_data(j,:) = [nanmean(betas(tmpp(:,j)==1)) nanstd(betas(tmpp(:,j)==1))];

    for bb=2:length(cutpoints)
        if bb == length(cutpoints) % last interval
            betap_dist(bb-1,j) = nansum(betas(tmpp(:,j)==1)>=cutpoints(bb-1)& betas(tmpp(:,j)==1)<=cutpoints(bb))/Ntot;
        else
            betap_dist(bb-1,j) = nansum(betas(tmpp(:,j)==1)>=cutpoints(bb-1)& betas(tmpp(:,j)==1)<cutpoints(bb))/Ntot;
        end
    end
%     disp(['Note: Osaka sample size for education group ',num2str(j),' = ',num2str(Ntot)]);
end

% % Check the approximated mean vs. the actual mean within each educ bin
% disp('Approximate mean within each education bin:')
% sum(betap_dist.*repmat(betap_grid,1,number_educ))
% disp('Actual mean within each education bin:')
% for j=1:number_educ
%     nanmean(betas(tmpp(:,j)==1))
% end
% disp('The discretized grid points for beta_p are:')
% betap_grid
% % Report conditional distribution (see also paper Appendix C.3)
% disp('The conditional distribution within each tercile is:')
% betap_dist
% % Report data moments of parental discount factors (see also paper Table 12)
% osaka_moments_data

% Make histograms of beta_avg for every education level

betamin = min(betas)-0.05;
nbins = 5;
% 1) High school or less
h1a = figure;
set(h1a,'visible','off');
histogram(betas(hs==1),nbins,'Normalization','probability'); 
% title('High school or less'); 
xlabel('\beta'); ylabel('Freq.'); xlim([betamin 1.05]); ylim([0 0.50]);
saveas(h1a,'fig_osaka_hist_betaavg_educ1', 'png')
saveas(h1a,'fig_osaka_hist_betaavg_educ1', 'eps')
close all; % close figures

% 2) Some college or college
h2a = figure;
set(h2a,'visible','off');
histogram(betas(sc==1|coll==1),nbins,'Normalization','probability'); 
% title('(Some) college');
xlabel('\beta'); ylabel('Freq.'); xlim([betamin 1.05]); ylim([0 0.50]);
saveas(h2a,'fig_osaka_hist_betaavg_educ2', 'png')
saveas(h2a,'fig_osaka_hist_betaavg_educ2', 'eps')
close all; % close figures

% 3) Graduate degree
h3a = figure;
set(h3a,'visible','off');
histogram(betas(grad==1),nbins,'Normalization','probability'); 
% title('Graduate');
xlabel('\beta'); ylabel('Freq.'); xlim([betamin 1.05]); ylim([0 0.50]);
saveas(h3a,'fig_osaka_hist_betaavg_educ3', 'png')
saveas(h3a,'fig_osaka_hist_betaavg_educ3', 'eps')

close all; % close figures

end % function